#####################################
# Replication Material for 
# Stefan Müller and Michael Jankowski:
# Do voters really prefer more choice? Determinants of support for personalised electoral systems.
# Journal of Elections, Public Opinion and Parties.
#####################################

### Note: This script shows how we transformed and merged the variables
### from the four election studies. To run this script, you first need 
### to download the raw data. If you want to rerun our regression models
### and plots, proceed with file 02_analyse_hh_hb.do and 03_create_plots.R.

### Raw data for each election can be retrieved for free after 
### regristraion from the 
### GESIS website using the following links:
### Hamburg 2011: http://doi.org/10.4232/1.11466
### Hamburg 2015: http://doi.org/10.4232/1.12650
### Bremen 2011: http://doi.org/10.4232/1.11467
### Bremen 2015: http://doi.org/10.4232/1.12651


library(haven)
library(tidyverse)

############ DATA: HH 2011

# Load Data and IDs

hh_2011 <- read_dta("ZA5623_v1-0-0.dta")

hh_2011$election <- "hh_2011"

hh_2011$resp_id <- paste0("hh_2011_", rep(1:(nrow(hh_2011)/5), 
                                          each = 5))

hh_2011 <- subset(hh_2011, !duplicated(resp_id))
nrow(hh_2011)

# Keep only relevant variables
hh_2011$nonvoter <- as.numeric(hh_2011$v3a)
hh_2011$splitting <- as.numeric(hh_2011$v3c)
hh_2011$int_wahl <- as.numeric(hh_2011$v3h)
hh_2011$rate_cdu <- as.numeric(hh_2011$v7b) - 1
hh_2011$rate_spd <- as.numeric(hh_2011$v7d) - 1
hh_2011$rate_green <- as.numeric(hh_2011$v7f) - 1
hh_2011$rate_left <- as.numeric(hh_2011$v7h) - 1
hh_2011$rate_fdp <- as.numeric(hh_2011$v7j) - 1
hh_2011$pol_int <- as.numeric(hh_2011$v11)
hh_2011$pref_law <- as.numeric(hh_2011$v38)
hh_2011$complex_law <- as.numeric(hh_2011$v39)
hh_2011$age_group <- as.numeric(hh_2011$vb)
hh_2011$education <- as.numeric(hh_2011$vf)
hh_2011$university <- as.numeric(hh_2011$vg)
hh_2011$party_id <- as.numeric(hh_2011$vs)
hh_2011$party_id_local <- as.numeric(hh_2011$v10a)
hh_2011$gender <- as.numeric(hh_2011$va)
hh_2011$age_fine <- NA
hh_2011$rate_cdu_bund <- as.numeric(hh_2011$v7a) - 1
hh_2011$rate_spd_bund <- as.numeric(hh_2011$v7c) - 1
hh_2011$rate_green_bund <- as.numeric(hh_2011$v7e) - 1 
hh_2011$rate_left_bund <- as.numeric(hh_2011$v7g) - 1
hh_2011$rate_fdp_bund <- as.numeric(hh_2011$v7i)  - 1
hh_2011$last_election <- as.numeric(hh_2011$v4)

## Recode winner

hh_2011$winner <- ifelse(hh_2011$last_election == 1 | hh_2011$last_election == 3, "Winner", ifelse(hh_2011$last_election >= 8, "Does not apply", "Loser"))

## Recode local party id because the variable is not the same across elections

hh_2011 <- hh_2011 %>% 
  mutate(party_id_local = car::recode(party_id_local,
                                      "1='CDU'; 2='SPD'; 3='Grüne';
                                      4='Linke';5='FDP'; 6:7='Andere';8='Keine'"))


# Create variable for political knowledge (knowing name of the main candidates of CDU and SPD)
hh_2011 <- hh_2011 %>%
  dplyr::mutate(CDUcandidate = car::recode(v8a, "1=1; else=0")) %>% 
  dplyr::mutate(SPDcandidate = car::recode(v8b, "1=1; else=0")) %>% 
  dplyr::mutate(pol_knowledge = CDUcandidate + SPDcandidate)  %>% 
  dplyr::mutate(pol_knowledge_dummy = as.factor(car::recode(pol_knowledge, "2='high level'; else='low level'")))

hh_2011$age_group <- ifelse(hh_2011$age_group <= 3, "<30",
                            ifelse(hh_2011$age_group == 4 | hh_2011$age_group == 5, "30-39",
                            ifelse(hh_2011$age_group == 6 | hh_2011$age_group == 7, "40-49",
                            ifelse(hh_2011$age_group == 8, "50-59",
                            ifelse(hh_2011$age_group == 9, "60-69",
                            ifelse(hh_2011$age_group == 10, "70+", NA))))))

hh_2011 <- dplyr::select(hh_2011,
                         election,
                         nonvoter, 
                         splitting, 
                         int_wahl, 
                         rate_cdu, 
                         rate_spd, 
                         rate_green, 
                         rate_left, 
                         rate_fdp,
                         pol_int,
                         pref_law,
                         complex_law,
                         age_group,
                         education,
                         university,
                         party_id,
                         party_id_local,
                         pol_knowledge,
                         pol_knowledge_dummy,
                         gender,
                         age_fine,
                         rate_cdu_bund, 
                         rate_spd_bund, 
                         rate_green_bund, 
                         rate_left_bund, 
                         rate_fdp_bund,
                         winner)

############ DATA: HH 2015

# Load Data and IDs

hh_2015 <- read_dta("ZA6698_v1-0-0.dta")

hh_2015$election <- "hh_2015"

hh_2015$resp_id <- paste0("hh_2015_", rep(1:(nrow(hh_2015)/5), each = 5))

hh_2015 <- subset(hh_2015, !duplicated(resp_id))
nrow(hh_2015)

names(hh_2015) <- tolower(names(hh_2015))

# Keep only relevant variables
hh_2015$nonvoter <- as.numeric(hh_2015$v3a)
hh_2015$splitting <- as.numeric(hh_2015$v3c)
hh_2015$int_wahl <- as.numeric(hh_2015$v3h)
hh_2015$rate_spd <- as.numeric(hh_2015$v7b) - 1
hh_2015$rate_cdu <- as.numeric(hh_2015$v7d) - 1
hh_2015$rate_left <- as.numeric(hh_2015$v7f) - 1
hh_2015$rate_fdp <- as.numeric(hh_2015$v7h) - 1
hh_2015$rate_green <- as.numeric(hh_2015$v7j) - 1 
hh_2015$rate_afd <- as.numeric(hh_2015$v7l) - 1
hh_2015$pol_int <- as.numeric(hh_2015$v12)
hh_2015$pref_law <- as.numeric(hh_2015$v43)
hh_2015$complex_law <- as.numeric(hh_2015$v44)
hh_2015$age_group <- as.numeric(hh_2015$vb)
hh_2015$education <- as.numeric(hh_2015$vf)
hh_2015$university <- as.numeric(hh_2015$vg)
hh_2015$party_id <- as.numeric(hh_2015$vs)
hh_2015$party_id_local <- as.numeric(hh_2015$v11a)
hh_2015$gender <- as.numeric(hh_2015$va)
hh_2015$age_fine <- as.numeric(hh_2015$vb)
hh_2015$rate_spd_bund <- as.numeric(hh_2015$v7a) - 1
hh_2015$rate_cdu_bund <- as.numeric(hh_2015$v7c) - 1
hh_2015$rate_left_bund <- as.numeric(hh_2015$v7e) - 1
hh_2015$rate_fdp_bund <- as.numeric(hh_2015$v7g) - 1
hh_2015$rate_green_bund <- as.numeric(hh_2015$v7i) - 1
hh_2015$last_election <- as.numeric(hh_2015$v4)

## Recode winner

hh_2015$winner <- ifelse(hh_2015$last_election == 1, "Winner", ifelse(hh_2015$last_election >= 9, "Does not apply", "Loser"))

## Recode local party id because the variable is not the same across elections

hh_2015 <- hh_2015 %>% 
  mutate(party_id_local = car::recode(party_id_local,
                                      "1='SPD'; 2='CDU'; 3='Linke';
                                      4='FDP';5='Grüne'; 6:8='Andere';9='Keine'"))


## Create variable for political knowledge (knowing name of the main candidates of CDU and SPD)

hh_2015 <- hh_2015 %>% 
  dplyr::mutate(CDUcandidate = car::recode(v9b, "1=1; else=0")) %>% 
  dplyr::mutate(SPDcandidate = car::recode(v9a, "1=1; else=0")) %>% 
  dplyr::mutate(pol_knowledge = CDUcandidate + SPDcandidate) %>% 
  dplyr::mutate(pol_knowledge_dummy = as.factor(car::recode(pol_knowledge, "2='high level'; else='low level'")))


hh_2015$age_group <- ifelse(hh_2015$age_group <= 4, "<30",
                            ifelse(hh_2015$age_group == 5 | hh_2015$age_group == 6, "30-39",
                                   ifelse(hh_2015$age_group == 7 | hh_2015$age_group == 8, "40-49",
                                          ifelse(hh_2015$age_group == 9, "50-59",
                                                 ifelse(hh_2015$age_group == 10, "60-69",
                                                        ifelse(hh_2015$age_group == 11, "70+", NA))))))


hh_2015 <- dplyr::select(hh_2015,
                         election,
                         nonvoter, 
                         splitting, 
                         int_wahl, 
                         rate_cdu, 
                         rate_spd, 
                         rate_green, 
                         rate_left, 
                         rate_fdp,
                         pol_int,
                         pref_law,
                         complex_law,
                         age_group,
                         education,
                         university,
                         party_id,
                         party_id_local,
                         pol_knowledge,
                         pol_knowledge_dummy,
                         gender,
                         age_fine,
                         rate_cdu_bund, 
                         rate_spd_bund, 
                         rate_green_bund, 
                         rate_left_bund,
                         rate_fdp_bund,
                         winner)


############ DATA: HB 2011

# Load Data and IDs

hb_2011 <- read_dta("ZA5627_v1-0-0.dta")

hb_2011$election <- "hb_2011"

hb_2011$resp_id <- paste0("hb_2011_", rep(1:(nrow(hb_2011)/5), each = 5))

hb_2011 <- subset(hb_2011, !duplicated(resp_id))
nrow(hb_2011)

# Keep only relevant variables

hb_2011$nonvoter <- as.numeric(hb_2011$v3a)
hb_2011$splitting <- as.numeric(hb_2011$v3c)
hb_2011$int_wahl <- as.numeric(hb_2011$v3h)
hb_2011$rate_spd <- as.numeric(hb_2011$v7b) - 1
hb_2011$rate_cdu <- as.numeric(hb_2011$v7d) - 1
hb_2011$rate_green <- as.numeric(hb_2011$v7f) - 1
hb_2011$rate_left <- as.numeric(hb_2011$v7h) - 1
hb_2011$rate_fdp <- as.numeric(hb_2011$v7j) - 1
hb_2011$pol_int <- as.numeric(hb_2011$v12)
hb_2011$pref_law <- as.numeric(hb_2011$v36)
hb_2011$complex_law <- as.numeric(hb_2011$v37)
hb_2011$age_group <- as.numeric(hb_2011$vb)
hb_2011$education <- as.numeric(hb_2011$vf)
hb_2011$university <- as.numeric(hb_2011$vg)
hb_2011$party_id <- as.numeric(hb_2011$vs)
hb_2011$party_id_local <- as.numeric(hb_2011$v11a)
hb_2011$gender <- as.numeric(hb_2011$va)
hb_2011$age_fine <- as.numeric(hb_2011$vb)
hb_2011$rate_spd_bund <- as.numeric(hb_2011$v7a) - 1
hb_2011$rate_cdu_bund <- as.numeric(hb_2011$v7c) - 1
hb_2011$rate_green_bund <- as.numeric(hb_2011$v7e) - 1
hb_2011$rate_left_bund <- as.numeric(hb_2011$v7g) - 1
hb_2011$rate_fdp_bund <- as.numeric(hb_2011$v7i) - 1
hb_2011$last_election <- as.numeric(hb_2011$v4)


## Recode winner

hb_2011$winner <- ifelse(hb_2011$last_election == 1 | hb_2011$last_election == 3, "Winner", ifelse(hb_2011$last_election >= 9, "Does not apply", "Loser"))

## Recode local party id because the variable is not the same across elections

hb_2011 <- hb_2011 %>% 
  mutate(party_id_local = car::recode(party_id_local,
                                      "1='SPD'; 2='CDU'; 3='Grüne';
                                      4='Linke';5='FDP'; 6='Andere';7='Keine'"))


## Create variable for political knowledge (knowing name of the main candidates of CDU and SPD)
hb_2011 <- hb_2011 %>% 
  dplyr::mutate(CDUcandidate = car::recode(v9b, "1=1; else=0")) %>% 
  dplyr::mutate(SPDcandidate = car::recode(v9a, "1=1; else=0")) %>% 
  dplyr::mutate(pol_knowledge = CDUcandidate + SPDcandidate) %>% 
  dplyr::mutate(pol_knowledge_dummy = as.factor(car::recode(pol_knowledge, "2='high level'; else='low level'")))


hb_2011$age_group <- ifelse(hb_2011$age_group <= 4, "<30",
                            ifelse(hb_2011$age_group == 5 | hb_2011$age_group == 6, "30-39",
                                   ifelse(hb_2011$age_group == 7 | hb_2011$age_group == 8, "40-49",
                                          ifelse(hb_2011$age_group == 9, "50-59",
                                                 ifelse(hb_2011$age_group == 10, "60-69",
                                                        ifelse(hb_2011$age_group == 11, "70+", NA))))))

hb_2011 <- dplyr::select(hb_2011,
                         election,
                         nonvoter, 
                         splitting, 
                         int_wahl, 
                         rate_cdu,
                         rate_spd, 
                         rate_green,
                         rate_left, 
                         rate_fdp,
                         pol_int,
                         pref_law,
                         complex_law,
                         age_group,
                         education,
                         university,
                         party_id,
                         party_id_local,
                         pol_knowledge,
                         pol_knowledge_dummy,
                         gender,
                         age_fine,
                         rate_cdu_bund, rate_spd_bund, rate_green_bund, rate_left_bund, rate_fdp_bund,
                         winner)

############ DATA: HB 2015

# Load Data and IDs

hb_2015 <- read_dta("ZA6699_v1-0-0.dta")

hb_2015$election <- "hb_2015"

hb_2015$resp_id <- paste0("hb_2015_", rep(1:(nrow(hb_2015)/5), each = 5))

hb_2015 <- subset(hb_2015, !duplicated(resp_id))
nrow(hb_2015)

names(hb_2015) <- tolower(names(hb_2015))

# Keep only relevant variables
hb_2015$nonvoter <- as.numeric(hb_2015$v3a)
hb_2015$splitting <- as.numeric(hb_2015$v3c)
hb_2015$int_wahl <- as.numeric(hb_2015$v3h)
hb_2015$rate_spd <- as.numeric(hb_2015$v7b) - 1
hb_2015$rate_green <- as.numeric(hb_2015$v7d) - 1
hb_2015$rate_cdu <- as.numeric(hb_2015$v7f) - 1
hb_2015$rate_left <- as.numeric(hb_2015$v7h) - 1
hb_2015$rate_fdp <- as.numeric(hb_2015$v7j) - 1
hb_2015$rate_afd <- as.numeric(hb_2015$v7l) - 1
hb_2015$pol_int <- as.numeric(hb_2015$v12)
hb_2015$pref_law <- as.numeric(hb_2015$v42)
hb_2015$complex_law <- as.numeric(hb_2015$v43)
hb_2015$age_group <- as.numeric(hb_2015$vb)
hb_2015$education <- as.numeric(hb_2015$vf)
hb_2015$university <- as.numeric(hb_2015$vg)
hb_2015$party_id <- as.numeric(hb_2015$vs)
hb_2015$party_id_local <- as.numeric(hb_2015$v11a)
hb_2015$gender <- as.numeric(hb_2015$va)
hb_2015$age_fine <- as.numeric(hb_2015$vb)
hb_2015$rate_spd_bund <- as.numeric(hb_2015$v7a) - 1
hb_2015$rate_green_bund <- as.numeric(hb_2015$v7c) - 1
hb_2015$rate_cdu_bund <- as.numeric(hb_2015$v7e) - 1
hb_2015$rate_left_bund <- as.numeric(hb_2015$v7g) - 1
hb_2015$rate_fdp_bund <- as.numeric(hb_2015$v7i) - 1
hb_2015$rate_afd_bund <- as.numeric(hb_2015$v7k) - 1
hb_2015$last_election <- as.numeric(hb_2015$v4)

## Recode winner

hb_2015$winner <- ifelse(hb_2015$last_election == 1 | hb_2015$last_election == 2, "Winner", ifelse(hb_2015$last_election >= 10, "Does not apply", "Loser"))

## Recode local party id because the variable is not the same across elections

hb_2015 <- hb_2015 %>% 
  mutate(party_id_local = car::recode(party_id_local,
                                      "1='SPD'; 2='Grüne'; 3='CDU';
                                      4='Linke';5='Andere'; 6='FDP';7='Andere';8='Keine'"))


## Create variable for political knowledge (knowing name of the main candidates of CDU and SPD)
hb_2015 <- hb_2015 %>% 
  dplyr::mutate(CDUcandidate = car::recode(v9b, "1=1; else=0")) %>% 
  dplyr::mutate(SPDcandidate = car::recode(v9a, "1=1; else=0")) %>% 
  dplyr::mutate(pol_knowledge = CDUcandidate + SPDcandidate) %>% 
  dplyr::mutate(pol_knowledge_dummy = as.factor(car::recode(pol_knowledge, "2='high level'; else='low level'")))


hb_2015$age_group <- ifelse(hb_2015$age_group <= 4, "<30",
                            ifelse(hb_2015$age_group == 5 | hb_2015$age_group == 6, "30-39",
                                   ifelse(hb_2015$age_group == 7 | hb_2015$age_group == 8, "40-49",
                                          ifelse(hb_2015$age_group == 9, "50-59",
                                                 ifelse(hb_2015$age_group == 10, "60-69",
                                                        ifelse(hb_2015$age_group == 11, "70+", NA))))))


hb_2015 <- dplyr::select(hb_2015,
                         election,
                         nonvoter, 
                         splitting, 
                         int_wahl, 
                         rate_cdu, 
                         rate_spd, 
                         rate_green,
                         rate_left, 
                         rate_fdp,
                         pol_int,
                         pref_law,
                         complex_law,
                         age_group,
                         education,
                         university,
                         party_id,
                         party_id_local,
                         pol_knowledge,
                         pol_knowledge_dummy,
                         gender,
                         age_fine,
                         rate_cdu_bund,
                         rate_spd_bund,
                         rate_green_bund,
                         rate_left_bund, 
                         rate_fdp_bund,
                         winner)

#########################################################
# Append Data
#########################################################

df <- dplyr::bind_rows(hh_2011, hb_2011, hh_2015, hb_2015)

# Adjust vars

df$nonvoter[df$nonvoter == 6] <- NA # 6 = "NA"
df$nonvoter[df$nonvoter == 1 | df$nonvoter == 5] <- 0 # 1 = gehe, 5 = hab schon briefwahl
df$nonvoter[df$nonvoter >= 2 & df$nonvoter <= 4] <- 1 # 2-4 = "wahrscheinlich" --> NON VOTER?
df$splitting[df$splitting > 2 | df$splitting == 0] <- NA # 3 = "NA", 4 = "wähle ungültig"
df$splitting[df$nonvoter == 1] <- 0 # 0 = nonvoter, 1 = non-splitter, 2 = splitter
df$int_wahl[df$int_wahl==5] <- NA # 5 = "NA", 1 - 4 = starkes...kein interesse
df[,grepl("rate_", names(df))] %<>% apply(., 2, function(x) {x[x==12] <- NA; x})
df$pol_int[df$pol_int == 6] <- NA # 6 = NA
df$pref_law[df$pref_law == 4] <- NA  # 4 = NA
df$pref_law[df$pref_law == 0] <- NA  # 0 = Not Shown
df$complex_law[df$complex_law == 0] <- NA # 0 = Not Shown
df$complex_law[df$complex_law == 3] <- NA # 3 = NA
df$education[df$education == 6] <- NA # 6 = NA 
df$university[df$university == 3] <- NA # 3 = NA
df$party_id[df$party_id == 12] <- NA # 12 = NA
df$party_id[df$party_id == 11] <- 10 # 11 = NA
df$gender <- df$gender - 1 # gender 0/1
df$age_fine[df$age_fine == 12] <- NA

df <- df %>% 
  mutate(pref_law_factor = ifelse(pref_law == 1, "Approval",
                                  ifelse(pref_law == 2, "Disapproval", "Indifference"))) %>%
  mutate(election_factor = ifelse(election == "hh_2011", "Hamburg 2011",
                                  ifelse(election == "hh_2015", "Hamburg 2015",
                                         ifelse(election == "hb_2011", "Bremen 2011", "Bremen 2015")))) %>% 
  mutate(state = ifelse(election_factor %in% c("Hamburg 2011", "Hamburg 2015"), "Hamburg", "Bremen")) %>% 
  mutate(year = ifelse(election_factor %in% c("Hamburg 2011", "Bremen 2011"), "2011", "2015"))


write_dta(df, "hh_hb_11_15_prepared.dta", version = 13)

